Read in, format, and visualize data.
# Read in target data
target_data <- readr::read_csv("https://data.ecoforecast.org/targets/terrestrial/terrestrial_30min-targets.csv.gz", guess_max = 1e6)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## time = col_datetime(format = ""),
## siteID = col_character(),
## nee = col_double(),
## le = col_double(),
## nee_sd_intercept = col_double(),
## nee_sd_slopeP = col_double(),
## nee_sd_slopeN = col_double(),
## le_sd_intercept = col_double(),
## le_sd_slopeP = col_double(),
## le_sd_slopeN = col_double(),
## vswc = col_double(),
## vswc_sd = col_double()
## )
# Read in gap-filled temperature data
temp_inSW_data <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_inSW_gapfill.csv"))
# Convert time column from character to POSIXct
# Time zone in target data is UTC according to https://docs.google.com/document/d/1l7sxBk-z-GHTlk50rdxP0lPTwJzFJ2gykclINkMsWcc/edit
target_data$time <- ymd_hms(target_data$time, tz = "UTC")
temp_inSW_data$startDateTime <- ymd_hms(temp_inSW_data$startDateTime, tz = "UTC")
# Convert siteID column from character to factor
target_data$siteID <- as.factor(target_data$siteID)
temp_inSW_data$siteID <- as.factor(temp_inSW_data$siteID)
# Plot all data
ggplot(data = target_data) +
geom_point(aes(x = time, y = nee, color = siteID)) +
facet_grid(siteID ~ .) +
labs(title = "Full Time Period: 01 Feb 2017 - 31 Mar 2021",
x = "Time (UTC)",
y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
theme_bw() +
theme(legend.position = "none")
# Plot data before forecast time period
ggplot(data = filter(target_data,
time >= "2021-03-01",
time < "2021-04-01")) +
geom_point(aes(x = time, y = nee, color = siteID)) +
facet_grid(siteID ~ .) +
labs(title = "Example Time Period: 01 Mar 2021 - 31 Mar 2021",
x = "Time (UTC)",
y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
theme_bw() +
theme(legend.position = "none")
Construct dynamic linear model (DLM) for NIMBLE.
We used the following model structure:
\[NEE(t) = \beta_1 \times NEE(t-1) + \beta_2 \times \Delta SW_{incoming} + \beta_3 \times \Delta temp + \beta_4\]
TempDLM <- nimbleCode({
# Note:
# x = real NEE (state variable)
# y = observed NEE
# Priors
x[1] ~ dnorm(x_ic, sd = sd_ic)
sd_add ~ dunif(0, 100)
b1 ~ dunif(0, 1) # "Uncertainty stabilization" parameter
b2 ~ dunif(-10, 0) # Incoming SW radiation effect coefficient
b3 ~ dunif(0, 10) # Temperature effect coefficient
b4 ~ dunif(-50, 50) # Intercept
# Process model
for(t in 2:n){
pred[t] <- b1 * x[t - 1] + b2 * (inSW[t] - inSW[t - 1]) + b3 * (temp[t] - temp[t - 1]) + b4
x[t] ~ dnorm(pred[t], sd = sd_add)
}
# Data model
for(t in 1:n){
y[t] ~ dnorm(x[t], sd = sd_obs[t])
}
})
Because of the size of the data set, a subset of the data had to be used for model fitting. For forecasting, the data subset consisted of data from the time period immediately before the forecast period. The variables controlling the size of the data subset are specified below.
# Note: B/c of size of data set, need to use subset of data
subset_length_days <- 21 # Length of subset [d]
subset_length_timesteps <- subset_length_days * 24 * 2 # Number of time steps in subset (days x hours/day x half hours/hour)
Specify data from BART to be used for model fitting.
# Separate data by site
target_data_BART <- filter(target_data, siteID == "BART")
temp_data_BART <- filter(temp_inSW_data, siteID == "BART")
# Scale data
sd_temp_data_BART <- sd(temp_data_BART$temp)
sd_inSW_data_BART <- sd(temp_data_BART$inSW)
temp_data_BART$temp <- temp_data_BART$temp/sd_temp_data_BART
temp_data_BART$inSW <- temp_data_BART$inSW/sd_inSW_data_BART
# Specify time period end date and time
end_date_time_BART <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")
# Match time period end date and time to index value
end_i_BART <- match(end_date_time_BART, target_data_BART$time)
# Calculate time period start data and time index value
start_i_BART <- end_i_BART - subset_length_timesteps + 1
# Subset data
target_data_BART_sub <- target_data_BART[start_i_BART:end_i_BART,]
# Interpolate NEE values to estimate sd_obs
nee_interp_BART_sub <- approx(x = target_data_BART_sub$time[!is.na(target_data_BART_sub$nee)],
y = target_data_BART_sub$nee[!is.na(target_data_BART_sub$nee)],
xout = target_data_BART_sub$time,
method = "linear",
yleft = mean(target_data_BART_sub$nee, na.rm = TRUE),
yright = mean(target_data_BART_sub$nee, na.rm = TRUE),
na.rm = TRUE)
sd_obs_BART_sub <- rep(NA, length(target_data_BART_sub$nee)) # Initialize sd_obs vector
for(i in 1:length(sd_obs_BART_sub)){
if(nee_interp_BART_sub$y[i] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeP[1] * nee_interp_BART_sub$y[i] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeN[1] * nee_interp_BART_sub$y[i]
}
}
# Match temperature data to subset NEE data
BART_temp_i <- match(target_data_BART_sub$time, temp_data_BART$startDateTime)
temp_data_BART_sub <- temp_data_BART[BART_temp_i,]
# Specify sd_obs modifier (to account for lower NEE in winter)
sd_obs_mod_BART <- 0.75
# Specify data
data_TempDLM_BART <- list(y = target_data_BART_sub$nee,
inSW = temp_data_BART_sub$inSW,
temp = temp_data_BART_sub$temp,
sd_obs = sd_obs_BART_sub * sd_obs_mod_BART)
Specify data from KONZ to be used for model fitting.
# Separate data by site
target_data_KONZ <- filter(target_data, siteID == "KONZ")
temp_data_KONZ <- filter(temp_inSW_data, siteID == "KONZ")
# Scale data
sd_temp_data_KONZ <- sd(temp_data_KONZ$temp)
sd_inSW_data_KONZ <- sd(temp_data_KONZ$inSW)
temp_data_KONZ$temp <- temp_data_KONZ$temp/sd_temp_data_KONZ
temp_data_KONZ$inSW <- temp_data_KONZ$inSW/sd_inSW_data_KONZ
# Specify time period end date and time
end_date_time_KONZ <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")
# Match time period end date and time to index value
end_i_KONZ <- match(end_date_time_KONZ, target_data_KONZ$time)
# Calculate time period start data and time index value
start_i_KONZ <- end_i_KONZ - subset_length_timesteps + 1
# Subset data
target_data_KONZ_sub <- target_data_KONZ[start_i_KONZ:end_i_KONZ,]
# Interpolate NEE values to estimate sd_obs
nee_interp_KONZ_sub <- approx(x = target_data_KONZ_sub$time[!is.na(target_data_KONZ_sub$nee)],
y = target_data_KONZ_sub$nee[!is.na(target_data_KONZ_sub$nee)],
xout = target_data_KONZ_sub$time,
method = "linear",
yleft = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
yright = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
na.rm = TRUE)
sd_obs_KONZ_sub <- rep(NA, length(target_data_KONZ_sub$nee)) # Initialize sd_obs vector
for(i in 1:length(sd_obs_KONZ_sub)){
if(nee_interp_KONZ_sub$y[i] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeP[1] * nee_interp_KONZ_sub$y[i] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeN[1] * nee_interp_KONZ_sub$y[i]
}
}
# Match temperature data to subset NEE data
KONZ_temp_i <- match(target_data_KONZ_sub$time, temp_data_KONZ$startDateTime)
temp_data_KONZ_sub <- temp_data_KONZ[KONZ_temp_i,]
# Specify sd_obs modifier (to account for lower NEE in winter)
sd_obs_mod_KONZ <- 0.15
# Specify data
data_TempDLM_KONZ <- list(y = target_data_KONZ_sub$nee,
inSW = temp_data_KONZ_sub$inSW,
temp = temp_data_KONZ_sub$temp,
sd_obs = sd_obs_KONZ_sub * sd_obs_mod_KONZ)
Specify data from OSBS to be used for model fitting.
# Separate data by site
target_data_OSBS <- filter(target_data, siteID == "OSBS")
temp_data_OSBS <- filter(temp_inSW_data, siteID == "OSBS")
# Scale data
sd_temp_data_OSBS <- sd(temp_data_OSBS$temp)
sd_inSW_data_OSBS <- sd(temp_data_OSBS$inSW)
temp_data_OSBS$temp <- temp_data_OSBS$temp/sd_temp_data_OSBS
temp_data_OSBS$inSW <- temp_data_OSBS$inSW/sd_inSW_data_OSBS
# Specify time period end date and time
end_date_time_OSBS <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")
# Match time period end date and time to index value
end_i_OSBS <- match(end_date_time_OSBS, target_data_OSBS$time)
# Calculate time period start data and time index value
start_i_OSBS <- end_i_OSBS - subset_length_timesteps + 1
# Subset data
target_data_OSBS_sub <- target_data_OSBS[start_i_OSBS:end_i_OSBS,]
# Interpolate NEE values to estimate sd_obs
nee_interp_OSBS_sub <- approx(x = target_data_OSBS_sub$time[!is.na(target_data_OSBS_sub$nee)],
y = target_data_OSBS_sub$nee[!is.na(target_data_OSBS_sub$nee)],
xout = target_data_OSBS_sub$time,
method = "linear",
yleft = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
yright = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
na.rm = TRUE)
sd_obs_OSBS_sub <- rep(NA, length(target_data_OSBS_sub$nee)) # Initialize sd_obs vector
for(i in 1:length(sd_obs_OSBS_sub)){
if(nee_interp_OSBS_sub$y[i] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeP[1] * nee_interp_OSBS_sub$y[i] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeN[1] * nee_interp_OSBS_sub$y[i]
}
}
# Match temperature data to subset NEE data
OSBS_temp_i <- match(target_data_OSBS_sub$time, temp_data_OSBS$startDateTime)
temp_data_OSBS_sub <- temp_data_OSBS[OSBS_temp_i,]
# Specify sd_obs modifier (generally not needed for OSBS b/c of relatively high NEE in winter)
sd_obs_mod_OSBS <- 0.85
# Specify data
data_TempDLM_OSBS <- list(y = target_data_OSBS_sub$nee,
inSW = temp_data_OSBS_sub$inSW,
temp = temp_data_OSBS_sub$temp,
sd_obs = sd_obs_OSBS_sub * sd_obs_mod_OSBS)
Specify data from SRER to be used for model fitting.
# Separate data by site
target_data_SRER <- filter(target_data, siteID == "SRER")
temp_data_SRER <- filter(temp_inSW_data, siteID == "SRER")
# Scale data
sd_temp_data_SRER <- sd(temp_data_SRER$temp)
sd_inSW_data_SRER <- sd(temp_data_SRER$inSW)
temp_data_SRER$temp <- temp_data_SRER$temp/sd_temp_data_SRER
temp_data_SRER$inSW <- temp_data_SRER$inSW/sd_inSW_data_SRER
# Specify time period end date and time
end_date_time_SRER <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")
# Match time period end date and time to index value
end_i_SRER <- match(end_date_time_SRER, target_data_SRER$time)
# Calculate time period start data and time index value
start_i_SRER <- end_i_SRER - subset_length_timesteps + 1
# Subset data
target_data_SRER_sub <- target_data_SRER[start_i_SRER:end_i_SRER,]
# Interpolate NEE values to estimate sd_obs
nee_interp_SRER_sub <- approx(x = target_data_SRER_sub$time[!is.na(target_data_SRER_sub$nee)],
y = target_data_SRER_sub$nee[!is.na(target_data_SRER_sub$nee)],
xout = target_data_SRER_sub$time,
method = "linear",
yleft = mean(target_data_SRER_sub$nee, na.rm = TRUE),
yright = mean(target_data_SRER_sub$nee, na.rm = TRUE),
na.rm = TRUE)
sd_obs_SRER_sub <- rep(NA, length(target_data_SRER_sub$nee)) # Initialize sd_obs vector
for(i in 1:length(sd_obs_SRER_sub)){
if(nee_interp_SRER_sub$y[i] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeP[1] * nee_interp_SRER_sub$y[i] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeN[1] * nee_interp_SRER_sub$y[i]
}
}
# Match temperature data to subset NEE data
SRER_temp_i <- match(target_data_SRER_sub$time, temp_data_SRER$startDateTime)
temp_data_SRER_sub <- temp_data_SRER[SRER_temp_i,]
# Specify sd_obs modifier (to account for lower NEE in winter)
sd_obs_mod_SRER <- 0.25
# Specify data
data_TempDLM_SRER <- list(y = target_data_SRER_sub$nee,
inSW = temp_data_SRER_sub$inSW,
temp = temp_data_SRER_sub$temp,
sd_obs = sd_obs_SRER_sub * sd_obs_mod_SRER)
Specify constants for NIMBLE.
# Specify constants
constants_BART <- list(n = length(target_data_BART_sub$nee),
x_ic = 0,
sd_ic = 1)
constants_KONZ <- list(n = length(target_data_KONZ_sub$nee),
x_ic = 0,
sd_ic = 1)
constants_OSBS <- list(n = length(target_data_OSBS_sub$nee),
x_ic = 0,
sd_ic = 1)
constants_SRER <- list(n = length(target_data_SRER_sub$nee),
x_ic = 0,
sd_ic = 1)
Specify initial conditions for temperature DLM.
# Specify number of chains
nchains = 3
# Initialize initial condition lists
inits_TempDLM_BART <- list()
inits_TempDLM_KONZ <- list()
inits_TempDLM_OSBS <- list()
inits_TempDLM_SRER <- list()
# Generate initial conditions
for(i in 1:nchains){
# BART
y.samp <- sample(nee_interp_BART_sub$y, length(nee_interp_BART_sub$y), replace = TRUE)
inits_TempDLM_BART[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
x = nee_interp_BART_sub$y)
# KONZ
y.samp <- sample(nee_interp_KONZ_sub$y, length(nee_interp_KONZ_sub$y), replace = TRUE)
inits_TempDLM_KONZ[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
x = nee_interp_KONZ_sub$y)
# OSBS
y.samp <- sample(nee_interp_OSBS_sub$y, length(nee_interp_OSBS_sub$y), replace = TRUE)
inits_TempDLM_OSBS[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
x = nee_interp_OSBS_sub$y)
# SRER
y.samp <- sample(nee_interp_SRER_sub$y, length(nee_interp_SRER_sub$y), replace = TRUE)
inits_TempDLM_SRER[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
x = nee_interp_SRER_sub$y)
}
Run NIMBLE for temperature DLM.
# Preliminaries
niter <- 11000
nthin <- 1
# BART
nimble_out_TempDLM_BART <- nimbleMCMC(code = TempDLM,
data = data_TempDLM_BART,
inits = inits_TempDLM_BART,
constants = constants_BART,
monitors = c("b1",
"b2",
"b3",
"b4",
"sd_add",
"x",
"y"),
niter = niter,
nchains = nchains,
thin = nthin,
samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, y, logProb_y, logProb_x.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# KONZ
nimble_out_TempDLM_KONZ <- nimbleMCMC(code = TempDLM,
data = data_TempDLM_KONZ,
inits = inits_TempDLM_KONZ,
constants = constants_KONZ,
monitors = c("b1",
"b2",
"b3",
"b4",
"sd_add",
"x",
"y"),
niter = niter,
nchains = nchains,
thin = nthin,
samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, y, logProb_y, logProb_x.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# OSBS
nimble_out_TempDLM_OSBS <- nimbleMCMC(code = TempDLM,
data = data_TempDLM_OSBS,
inits = inits_TempDLM_OSBS,
constants = constants_OSBS,
monitors = c("b1",
"b2",
"b3",
"b4",
"sd_add",
"x",
"y"),
niter = niter,
nchains = nchains,
thin = nthin,
samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, logProb_x, y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# SRER
nimble_out_TempDLM_SRER <- nimbleMCMC(code = TempDLM,
data = data_TempDLM_SRER,
inits = inits_TempDLM_SRER,
constants = constants_SRER,
monitors = c("b1",
"b2",
"b3",
"b4",
"sd_add",
"x",
"y"),
niter = niter,
nchains = nchains,
thin = nthin,
samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, logProb_x, y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
Visualize chains and remove burn-in for temperature DLM.
# BART
# plot(nimble_out_TempDLM_BART[, c("b1")], main = "BART: b1") # Visualize all chains
# plot(nimble_out_TempDLM_BART[, c("sd_add")], main = "BART: sd_add") # Visualize all chains
# burnin_TempDLM_BART <- 2000 # Burn-in
# nimble_burn_TempDLM_BART <- window(nimble_out_TempDLM_BART, start = burnin_TempDLM_BART) # Exclude burn-in
# plot(nimble_burn_TempDLM_BART[, c("b1")], main = "BART: b1 (burn-in removed)") # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_BART[, c("sd_add")], main = "BART: sd_add (burn-in removed)") # Visualize chains w/o burn-in
#
# # KONZ
#
# plot(nimble_out_TempDLM_KONZ[, c("b1")], main = "KONZ: b1") # Visualize all chains
# plot(nimble_out_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add") # Visualize all chains
# burnin_TempDLM_KONZ <- 2000 # Burn-in
# nimble_burn_TempDLM_KONZ <- window(nimble_out_TempDLM_KONZ, start = burnin_TempDLM_KONZ) # Exclude burn-in
# plot(nimble_burn_TempDLM_KONZ[, c("b1")], main = "KONZ: b1 (burn-in removed)") # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add (burn-in removed)") # Visualize chains w/o burn-in
#
# # OSBS
#
# plot(nimble_out_TempDLM_OSBS[, c("b1")], main = "OSBS: b1") # Visualize all chains
# plot(nimble_out_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add") # Visualize all chains
# burnin_TempDLM_OSBS <- 2000 # Burn-in
# nimble_burn_TempDLM_OSBS <- window(nimble_out_TempDLM_OSBS, start = burnin_TempDLM_OSBS) # Exclude burn-in
# plot(nimble_burn_TempDLM_OSBS[, c("b1")], main = "OSBS: b1 (burn-in removed)") # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add (burn-in removed)") # Visualize chains w/o burn-in
#
# # SRER
#
# plot(nimble_out_TempDLM_SRER[, c("b1")], main = "SRER: b1") # Visualize all chains
# plot(nimble_out_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add") # Visualize all chains
# burnin_TempDLM_SRER <- 2000 # Burn-in
# nimble_burn_TempDLM_SRER <- window(nimble_out_TempDLM_SRER, start = burnin_TempDLM_SRER) # Exclude burn-in
# plot(nimble_burn_TempDLM_SRER[, c("b1")], main = "SRER: b1 (burn-in removed)") # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add (burn-in removed)") # Visualize chains w/o burn-in
#Plot the NIMBLE chains:
PlotChain = function(siteID, nimbleOutput, burnin = 2000)
{
#nimble_out_TempDLM_SRER = nimbleOutput
#nimble_burn_TempDLM_SRER = nimbleMinusBurn
#Visualize all chains:
plot(nimbleOutput[, c("b1")], main = paste(siteID, ": b1", sep = ''))
plot(nimbleOutput[, c("b2")], main = paste(siteID, ": b2", sep = ''))
plot(nimbleOutput[, c("b3")], main = paste(siteID, ": b3", sep = ''))
plot(nimbleOutput[, c("b4")], main = paste(siteID, ": b4", sep = ''))
plot(nimbleOutput[, c("sd_add")], main = paste(siteID, ": sd_add", sep = ''))
# Exclude burn-in:
nimbleMinusBurn <- window(nimbleOutput, start = burnin)
#Visualize chains w/o burn-in
#plot(nimbleMinusBurn[, c("b1")], main = "SRER: b1 (burn-in removed)")
#plot(nimbleMinusBurn[, c("sd_add")], main = "SRER: sd_add (burn-in removed)")
plot(nimbleMinusBurn[, c("b1")], main = paste(siteID, ": b1 (burn-in removed)", sep = ''))
plot(nimbleMinusBurn[, c("b2")], main = paste(siteID, ": b2 (burn-in removed)", sep = ''))
plot(nimbleMinusBurn[, c("b3")], main = paste(siteID, ": b3 (burn-in removed)", sep = ''))
plot(nimbleMinusBurn[, c("b4")], main = paste(siteID, ": b4 (burn-in removed)", sep = ''))
plot(nimbleMinusBurn[, c("sd_add")],
main = paste(siteID, "SRER: sd_add (burn-in removed)", sep = ''))
#Needed below:
#This is temporary. We probably want to eliminate this...
return(nimbleMinusBurn)
}
nimble_burn_TempDLM_BART = PlotChain(siteID = "BART", nimble_out_TempDLM_BART)
nimble_burn_TempDLM_KONZ = PlotChain(siteID = "KONZ", nimble_out_TempDLM_KONZ)
nimble_burn_TempDLM_OSBS = PlotChain(siteID = "OSBS", nimble_out_TempDLM_OSBS)
nimble_burn_TempDLM_SRER = PlotChain(siteID = "SRER", nimble_out_TempDLM_SRER)
Examine Brooks-Gelman-Rubin convergence diagnostic for temperature DLM.
Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for BART.
# BART
gelman.diag(nimble_burn_TempDLM_BART[, "b1"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.04 1.12
gelman.diag(nimble_burn_TempDLM_BART[, "b2"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1
gelman.diag(nimble_burn_TempDLM_BART[, "b3"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.03
gelman.diag(nimble_burn_TempDLM_BART[, "b4"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1.01
gelman.diag(nimble_burn_TempDLM_BART[, "sd_add"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1
Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for KONZ.
# KONZ
gelman.diag(nimble_burn_TempDLM_KONZ[, "b1"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.04 1.13
gelman.diag(nimble_burn_TempDLM_KONZ[, "b2"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1.01
gelman.diag(nimble_burn_TempDLM_KONZ[, "b3"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.03
gelman.diag(nimble_burn_TempDLM_KONZ[, "b4"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.06 1.2
gelman.diag(nimble_burn_TempDLM_KONZ[, "sd_add"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.03
Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for OSBS.
# OSBS
gelman.diag(nimble_burn_TempDLM_OSBS[, "b1"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.03 1.11
gelman.diag(nimble_burn_TempDLM_OSBS[, "b2"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.02 1.07
gelman.diag(nimble_burn_TempDLM_OSBS[, "b3"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1.01
gelman.diag(nimble_burn_TempDLM_OSBS[, "b4"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.02
gelman.diag(nimble_burn_TempDLM_OSBS[, "sd_add"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.07 1.23
Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for SRER.
# SRER
gelman.diag(nimble_burn_TempDLM_SRER[, "b1"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1
gelman.diag(nimble_burn_TempDLM_SRER[, "b2"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1
gelman.diag(nimble_burn_TempDLM_SRER[, "b3"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.02
gelman.diag(nimble_burn_TempDLM_SRER[, "b4"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.01 1.04
gelman.diag(nimble_burn_TempDLM_SRER[, "sd_add"])
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1 1.01
Convert temperature DLM output using tidybayes.
# BART
chain_TempDLM_BART <- nimble_burn_TempDLM_BART %>%
spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)
# KONZ
chain_TempDLM_KONZ <- nimble_burn_TempDLM_KONZ %>%
spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)
# OSBS
chain_TempDLM_OSBS <- nimble_burn_TempDLM_OSBS %>%
spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)
# SRER
chain_TempDLM_SRER <- nimble_burn_TempDLM_SRER %>%
spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)
Plot temperature DLM versus observations.
# plot_colors <- c("#F8766D", "#7CAE00", "#00BFC4", "#C77CFF")
#
# # BART
#
# plot_chain_TempDLM_BART <- chain_TempDLM_BART %>%
# group_by(day) %>%
# summarise(mean = mean(x),
# lower = quantile(x, 0.025),
# upper = quantile(x, 0.975),
# .groups = "drop") %>%
# mutate(time = target_data_BART_sub$time)
#
# plot_TempDLM_BART <- ggplot(data = plot_chain_TempDLM_BART, aes(x = time, y = mean)) +
# geom_line() +
# geom_ribbon(aes(ymin = lower, ymax = upper),
# alpha = 0.2,
# color = plot_colors[1],
# fill = plot_colors[1]) +
# geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
# labs(title = paste0("Site: BART | Time Period: ", substr(target_data_BART_sub$time[1], 1, 10), " to ", substr(target_data_BART_sub$time[subset_length_timesteps], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # KONZ
#
# plot_chain_TempDLM_KONZ <- chain_TempDLM_KONZ %>%
# group_by(day) %>%
# summarise(mean = mean(x),
# lower = quantile(x, 0.025),
# upper = quantile(x, 0.975),
# .groups = "drop") %>%
# mutate(time = target_data_KONZ_sub$time)
#
# plot_TempDLM_KONZ <- ggplot(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean)) +
# geom_line() +
# geom_ribbon(aes(ymin = lower, ymax = upper),
# alpha = 0.2,
# color = plot_colors[2],
# fill = plot_colors[2]) +
# geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
# labs(title = paste0("Site: KONZ | Time Period: ", substr(target_data_KONZ_sub$time[1], 1, 10), " to ", substr(target_data_KONZ_sub$time[subset_length_timesteps], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # OSBS
#
# plot_chain_TempDLM_OSBS <- chain_TempDLM_OSBS %>%
# group_by(day) %>%
# summarise(mean = mean(x),
# lower = quantile(x, 0.025),
# upper = quantile(x, 0.975),
# .groups = "drop") %>%
# mutate(time = target_data_OSBS_sub$time)
#
# plot_TempDLM_OSBS <- ggplot(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean)) +
# geom_line() +
# geom_ribbon(aes(ymin = lower, ymax = upper),
# alpha = 0.2,
# color = plot_colors[3],
# fill = plot_colors[3]) +
# geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
# labs(title = paste0("Site: OSBS | Time Period: ", substr(target_data_OSBS_sub$time[1], 1, 10), " to ", substr(target_data_OSBS_sub$time[subset_length_timesteps], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # SRER
#
# plot_chain_TempDLM_SRER <- chain_TempDLM_SRER %>%
# group_by(day) %>%
# summarise(mean = mean(x),
# lower = quantile(x, 0.025),
# upper = quantile(x, 0.975),
# .groups = "drop") %>%
# mutate(time = target_data_SRER_sub$time)
#
# plot_TempDLM_SRER <- ggplot(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean)) +
# geom_line() +
# geom_ribbon(aes(ymin = lower, ymax = upper),
# alpha = 0.2,
# color = plot_colors[4],
# fill = plot_colors[4]) +
# geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
# labs(title = paste0("Site: SRER | Time Period: ", substr(target_data_SRER_sub$time[1], 1, 10), " to ", substr(target_data_SRER_sub$time[subset_length_timesteps], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# plot_TempDLM_BART
# plot_TempDLM_KONZ
# plot_TempDLM_OSBS
# plot_TempDLM_SRER
#Reorganized code:
#Untested because I can't connect to https://data.ecoforecast.org/ right now
plot_colors = c("BART" = "#F8766D", "KONZ" = "#7CAE00", "OSBS" = "#00BFC4", "SRER" = "#C77CFF")
#Perform statistical summaries on a chain:
#We do this outside PlotTemperatureDLMvsObs() because the result is used again below.
GetChainStats <- function(target_data_sub, chain_TempDLM)
{
chainStatsSummary <- chain_TempDLM %>%
group_by(day) %>%
summarise(mean = mean(x),
lower = quantile(x, 0.025),
upper = quantile(x, 0.975),
.groups = "drop") %>%
mutate(time = target_data_sub$time)
return(chainStatsSummary)
}
#The argument order would be better reversed.
#See PlotObsAndForecast() below, which is similar.
PlotTemperatureDLMvsObs <- function(siteID, plot_chain_TempDLM, target_data_sub)
{
#plot_chain_TempDLM_SRER = plot_chain_TempDLM
#target_data_SRER_sub -> target_data_sub
plot = ggplot(plot_chain_TempDLM, aes(x = time, y = mean)) +
geom_line() +
geom_ribbon(aes(ymin = lower, ymax = upper),
alpha = 0.2,
color = plot_colors[siteID],
fill = plot_colors[siteID]) +
geom_point(data = target_data_sub, aes(x = time, y = nee), color = "black") +
labs(title = paste("Site:", siteID, "| Time Period: ", substr(target_data_sub$time[1], 1, 10),
"to", substr(target_data_sub$time[subset_length_timesteps], 1, 10)),
x = "Time (UTC)",
y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
theme_bw()
print(plot)
}
plot_chain_TempDLM_BART = GetChainStats(target_data_BART_sub, chain_TempDLM_BART)
PlotTemperatureDLMvsObs(site = "BART", plot_chain_TempDLM_BART, target_data_BART_sub)
plot_chain_TempDLM_KONZ = GetChainStats(target_data_KONZ_sub, chain_TempDLM_KONZ)
PlotTemperatureDLMvsObs(site = "KONZ", plot_chain_TempDLM_KONZ, target_data_KONZ_sub)
plot_chain_TempDLM_OSBS = GetChainStats(target_data_OSBS_sub, chain_TempDLM_OSBS)
PlotTemperatureDLMvsObs(site = "OSBS", plot_chain_TempDLM_OSBS, target_data_OSBS_sub)
plot_chain_TempDLM_SRER = GetChainStats(target_data_SRER_sub, chain_TempDLM_SRER)
PlotTemperatureDLMvsObs(site = "SRER", plot_chain_TempDLM_SRER, target_data_SRER_sub)
Generate forecasts.
# Forecast preliminaries
ensemble_size <- 1000 # Specify ensemble size
forecast_length <- 35 * 24 * 2 + 1 # Number of time steps in forecast
NOAA_temp_forecast <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_fc30_april.csv")) # Read in NOAA GEFS forecasts
Generate forecast for BART.
# Sample from temperature forecasts
NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "BART") # Select NOAA forecasts for BART
temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
size = ensemble_size,
replace = TRUE)
forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast incoming SW radiation matrix
forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast temperature matrix
for(i in 1:ensemble_size){
forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_BART
forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_BART
}
# Sample from parameter posterior distribution
chain_TempDLM_last_time_step <- filter(chain_TempDLM_BART, # Create tibble for MCMC results for last time step
day == max(chain_TempDLM_BART$day))
posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
size = ensemble_size,
replace = TRUE)
NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices] # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices] # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices] # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices] # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices] # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices] # Extract sampled sd_add values
# Specify initial conditions
NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize observed NEE matrix
NEE_state[1, ] <- NEE_ic # Specify initial condition for state variable
# Generate forecast
for(m in 1:ensemble_size){
for(t in 1:forecast_length){
if(t > 1){ # Condition set b/c initial condition already specified for state variable
# Note:
# NEE_state = real NEE (state variable)
# NEE_obs = observed NEE
pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) + b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
}
if(NEE_state[t, m] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeP[1] * NEE_state[t, m] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeN[1] * NEE_state[t, m]
}
NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_BART)
}
}
nee <- as.vector(t(NEE_obs)) # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)
time <- rep(NA, length(nee)) # Initialize time vector
NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time) # Extract forecast times
# Assign forecast times
for(i in 1:forecast_length){
time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
}
siteID <- rep("BART", length.out = length(nee)) # Create siteID vector
ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length) # Create ensemble vector
forecast <- rep(1, length.out = length(nee)) # Create forecast vector
data_assimilation <- rep(0, length.out = length(nee)) # Create forecast vector
le <- rep(NA, length.out = length(nee)) # Create le vector
vswc <- rep(NA, length.out = length(nee)) # Create vswc vector
NEE_forecast_BART <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc) # Create data frame with forecast data
NEE_forecast_BART$time <- ymd_hms(NEE_forecast_BART$time, tz = "UTC") # Specify time column as POSIXct
Generate forecast for KONZ.
# Sample from temperature forecasts
NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "KONZ") # Select NOAA forecasts for KONZ
temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
size = ensemble_size,
replace = TRUE)
forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast incoming SW radiation matrix
forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast temperature matrix
for(i in 1:ensemble_size){
forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_KONZ
forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_KONZ
}
# Sample from parameter posterior distribution
chain_TempDLM_last_time_step <- filter(chain_TempDLM_KONZ, # Create tibble for MCMC results for last time step
day == max(chain_TempDLM_KONZ$day))
posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
size = ensemble_size,
replace = TRUE)
NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices] # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices] # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices] # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices] # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices] # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices] # Extract sampled sd_add values
# Specify initial conditions
NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize observed NEE matrix
NEE_state[1, ] <- NEE_ic # Specify initial condition for state variable
# Generate forecast
for(m in 1:ensemble_size){
for(t in 1:forecast_length){
if(t > 1){ # Condition set b/c initial condition already specified for state variable
# Note:
# NEE_state = real NEE (state variable)
# NEE_obs = observed NEE
pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) + b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
}
if(NEE_state[t, m] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeP[1] * NEE_state[t, m] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeN[1] * NEE_state[t, m]
}
NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_KONZ)
}
}
nee <- as.vector(t(NEE_obs)) # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)
time <- rep(NA, length(nee)) # Initialize time vector
NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time) # Extract forecast times
# Assign forecast times
for(i in 1:forecast_length){
time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
}
siteID <- rep("KONZ", length.out = length(nee)) # Create siteID vector
ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length) # Create ensemble vector
forecast <- rep(1, length.out = length(nee)) # Create forecast vector
data_assimilation <- rep(0, length.out = length(nee)) # Create forecast vector
le <- rep(NA, length.out = length(nee)) # Create le vector
vswc <- rep(NA, length.out = length(nee)) # Create vswc vector
NEE_forecast_KONZ <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc) # Create data frame with forecast data
NEE_forecast_KONZ$time <- ymd_hms(NEE_forecast_KONZ$time, tz = "UTC") # Specify time column as POSIXct
Generate forecast for OSBS.
# Sample from temperature forecasts
NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "OSBS") # Select NOAA forecasts for OSBS
temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
size = ensemble_size,
replace = TRUE)
forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast incoming SW radiation matrix
forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast temperature matrix
for(i in 1:ensemble_size){
forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_OSBS
forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_OSBS
}
# Sample from parameter posterior distribution
chain_TempDLM_last_time_step <- filter(chain_TempDLM_OSBS, # Create tibble for MCMC results for last time step
day == max(chain_TempDLM_OSBS$day))
posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
size = ensemble_size,
replace = TRUE)
NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices] # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices] # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices] # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices] # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices] # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices] # Extract sampled sd_add values
# Specify initial conditions
NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize observed NEE matrix
NEE_state[1, ] <- NEE_ic # Specify initial condition for state variable
# Generate forecast
for(m in 1:ensemble_size){
for(t in 1:forecast_length){
if(t > 1){ # Condition set b/c initial condition already specified for state variable
# Note:
# NEE_state = real NEE (state variable)
# NEE_obs = observed NEE
pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) + b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
}
if(NEE_state[t, m] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeP[1] * NEE_state[t, m] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeN[1] * NEE_state[t, m]
}
NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_OSBS)
}
}
nee <- as.vector(t(NEE_obs)) # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)
time <- rep(NA, length(nee)) # Initialize time vector
NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time) # Extract forecast times
# Assign forecast times
for(i in 1:forecast_length){
time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
}
siteID <- rep("OSBS", length.out = length(nee)) # Create siteID vector
ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length) # Create ensemble vector
forecast <- rep(1, length.out = length(nee)) # Create forecast vector
data_assimilation <- rep(0, length.out = length(nee)) # Create forecast vector
le <- rep(NA, length.out = length(nee)) # Create le vector
vswc <- rep(NA, length.out = length(nee)) # Create vswc vector
NEE_forecast_OSBS <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc) # Create data frame with forecast data
NEE_forecast_OSBS$time <- ymd_hms(NEE_forecast_OSBS$time, tz = "UTC") # Specify time column as POSIXct
Generate forecast for SRER.
# Sample from temperature forecasts
NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "SRER") # Select NOAA forecasts for SRER
temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
size = ensemble_size,
replace = TRUE)
forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast incoming SW radiation matrix
forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize forecast temperature matrix
for(i in 1:ensemble_size){
forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_SRER
forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_SRER
}
# Sample from parameter posterior distribution
chain_TempDLM_last_time_step <- filter(chain_TempDLM_SRER, # Create tibble for MCMC results for last time step
day == max(chain_TempDLM_SRER$day))
posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
size = ensemble_size,
replace = TRUE)
NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices] # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices] # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices] # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices] # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices] # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices] # Extract sampled sd_add values
# Specify initial conditions
NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size) # Initialize observed NEE matrix
NEE_state[1, ] <- NEE_ic # Specify initial condition for state variable
# Generate forecast
for(m in 1:ensemble_size){
for(t in 1:forecast_length){
if(t > 1){ # Condition set b/c initial condition already specified for state variable
# Note:
# NEE_state = real NEE (state variable)
# NEE_obs = observed NEE
pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) + b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
}
if(NEE_state[t, m] >= 0){ # Calculate sd_obs if NEE is positive
sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeP[1] * NEE_state[t, m] # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
}else{ # Calculate sd_obs if NEE is negative
sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeN[1] * NEE_state[t, m]
}
NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_SRER)
}
}
nee <- as.vector(t(NEE_obs)) # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)
time <- rep(NA, length(nee)) # Initialize time vector
NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time) # Extract forecast times
# Assign forecast times
for(i in 1:forecast_length){
time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
}
siteID <- rep("SRER", length.out = length(nee)) # Create siteID vector
ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length) # Create ensemble vector
forecast <- rep(1, length.out = length(nee)) # Create forecast vector
data_assimilation <- rep(0, length.out = length(nee)) # Create forecast vector
le <- rep(NA, length.out = length(nee)) # Create le vector
vswc <- rep(NA, length.out = length(nee)) # Create vswc vector
NEE_forecast_SRER <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc) # Create data frame with forecast data
NEE_forecast_SRER$time <- ymd_hms(NEE_forecast_SRER$time, tz = "UTC") # Specify time column as POSIXct
Compile forecasts and write output to .csv file.
NEE_forecast <- rbind(NEE_forecast_BART,
NEE_forecast_KONZ,
NEE_forecast_OSBS,
NEE_forecast_SRER)
write.csv(NEE_forecast, file = "terrestrial_30min-2021-04-01-VT_NEET.csv", row.names = FALSE)
Plot forecasts (with fitted model and observations).
# BART
# plot_NEE_forecast_BART <- NEE_forecast_BART %>%
# group_by(time) %>%
# summarise(mean = mean(nee),
# lower = quantile(nee, 0.025),
# upper = quantile(nee, 0.975),
# .groups = "drop")
#
# plot_TempDLM_BART <- ggplot() +
# geom_line(data = plot_chain_TempDLM_BART, aes(x = time, y = mean), color = "black") +
# geom_line(data = plot_NEE_forecast_BART, aes(x = time, y = mean), color = "black") +
# geom_ribbon(data = plot_chain_TempDLM_BART, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
# alpha = 0.2) +
# geom_ribbon(data = plot_NEE_forecast_BART, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
# alpha = 0.2) +
# geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
# scale_fill_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
# scale_color_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
# labs(title = paste0("Site: BART | Time Period: ", substr(plot_chain_TempDLM_BART$time[1], 1, 10), " to ", substr(plot_NEE_forecast_BART$time[length(plot_NEE_forecast_BART$time)], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # KONZ
#
# plot_NEE_forecast_KONZ <- NEE_forecast_KONZ %>%
# group_by(time) %>%
# summarise(mean = mean(nee),
# lower = quantile(nee, 0.025),
# upper = quantile(nee, 0.975),
# .groups = "drop")
#
# plot_TempDLM_KONZ <- ggplot() +
# geom_line(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean), color = "black") +
# geom_line(data = plot_NEE_forecast_KONZ, aes(x = time, y = mean), color = "black") +
# geom_ribbon(data = plot_chain_TempDLM_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
# alpha = 0.2) +
# geom_ribbon(data = plot_NEE_forecast_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
# alpha = 0.2) +
# geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
# scale_fill_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
# scale_color_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
# labs(title = paste0("Site: KONZ | Time Period: ", substr(plot_chain_TempDLM_KONZ$time[1], 1, 10), " to ", substr(plot_NEE_forecast_KONZ$time[length(plot_NEE_forecast_KONZ$time)], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # OSBS
#
# plot_NEE_forecast_OSBS <- NEE_forecast_OSBS %>%
# group_by(time) %>%
# summarise(mean = mean(nee),
# lower = quantile(nee, 0.025),
# upper = quantile(nee, 0.975),
# .groups = "drop")
#
# plot_TempDLM_OSBS <- ggplot() +
# geom_line(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean), color = "black") +
# geom_line(data = plot_NEE_forecast_OSBS, aes(x = time, y = mean), color = "black") +
# geom_ribbon(data = plot_chain_TempDLM_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
# alpha = 0.2) +
# geom_ribbon(data = plot_NEE_forecast_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
# alpha = 0.2) +
# geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
# scale_fill_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
# scale_color_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
# labs(title = paste0("Site: OSBS | Time Period: ", substr(plot_chain_TempDLM_OSBS$time[1], 1, 10), " to ", substr(plot_NEE_forecast_OSBS$time[length(plot_NEE_forecast_OSBS$time)], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#
# # SRER
#
# plot_NEE_forecast_SRER <- NEE_forecast_SRER %>%
# group_by(time) %>%
# summarise(mean = mean(nee),
# lower = quantile(nee, 0.025),
# upper = quantile(nee, 0.975),
# .groups = "drop")
#
# plot_TempDLM_SRER <- ggplot() +
# geom_line(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean), color = "black") +
# geom_line(data = plot_NEE_forecast_SRER, aes(x = time, y = mean), color = "black") +
# geom_ribbon(data = plot_chain_TempDLM_SRER, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
# alpha = 0.2) +
# geom_ribbon(data = plot_NEE_forecast_SRER, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
# alpha = 0.2) +
# geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
# scale_fill_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
# scale_color_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
# labs(title = paste0("Site: SRER | Time Period: ", substr(plot_chain_TempDLM_SRER$time[1], 1, 10), " to ", substr(plot_NEE_forecast_SRER$time[length(plot_NEE_forecast_SRER$time)], 1, 10)),
# x = "Time (UTC)",
# y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
# theme_bw()
#Plot observations and forecast for a given location:
PlotObsAndForecast <- function(siteID, nee_forecast, plot_chain_TempDLM, target_data_sub)
{
#NEE_forecast_SRER = nee_forecast
#plot_chain_TempDLM_SRER = plot_chain_TempDLM
#plot_NEE_forecast_SRER = plot_NEE_forecast
#target_data_SRER_sub = target_data_sub
plot_NEE_forecast <- nee_forecast %>%
group_by(time) %>%
summarise(mean = mean(nee),
lower = quantile(nee, 0.025),
upper = quantile(nee, 0.975),
.groups = "drop")
title = paste0("Site: ", siteID, " | Time Period: ", substr(plot_chain_TempDLM$time[1], 1, 10), " to ",
substr(plot_NEE_forecast$time[length(plot_NEE_forecast$time)], 1, 10))
plot = ggplot() +
geom_line(data = plot_chain_TempDLM, aes(x = time, y = mean), color = "black") +
geom_line(data = plot_NEE_forecast, aes(x = time, y = mean), color = "black") +
geom_ribbon(data = plot_chain_TempDLM,
aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"), alpha = 0.2) +
geom_ribbon(data = plot_NEE_forecast,
aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"), alpha = 0.2) +
geom_point(data = target_data_sub, aes(x = time, y = nee), color = "black") +
scale_fill_manual(name = "Period", values = c("gray", plot_colors[siteID]),
labels = c("Historical", "Forecast")) +
scale_color_manual(name = "Period", values = c("gray", plot_colors[siteID]),
labels = c("Historical", "Forecast")) +
labs(title = title, x = "Time (UTC)",
y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
theme_bw()
print(plot)
}
#plot_TempDLM_BART
#plot_TempDLM_KONZ
#plot_TempDLM_OSBS
#plot_TempDLM_SRER
PlotObsAndForecast(site = "BART", NEE_forecast_BART, plot_chain_TempDLM_BART, target_data_BART_sub)
PlotObsAndForecast(site = "KONZ", NEE_forecast_KONZ, plot_chain_TempDLM_KONZ, target_data_KONZ_sub)
PlotObsAndForecast(site = "OSBS", NEE_forecast_OSBS, plot_chain_TempDLM_OSBS, target_data_OSBS_sub)
PlotObsAndForecast(site = "SRER", NEE_forecast_SRER, plot_chain_TempDLM_SRER, target_data_SRER_sub)